/*******************************************************************************

This code file estimates underassessment ratios for condos.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
	set more off
	clear

*** Load data
	
	use "$data/clean/all_transactions.dta", clear
	
	* Drop likely non-arms-length transactions 
	drop if saleprice < 25000
	
*** Estimate underassessment
	
	* Step 1: Correct prices for time trend (using repeat sales)
				
		reghdfe lsaleprice i.year if residentialunits == 1, a(bbl)
		
		predict timeadjustment, xb
		
		bys year: gegen timeadjustment_ = max(timeadjustment)
		replace timeadjustment = timeadjustment_
		drop timeadjustment_
			
		gegen baseline = max(timeadjustment*(year==2019))
		
		replace timeadjustment = baseline - timeadjustment
		
		gen lsaleprice_adj = lsaleprice + timeadjustment
		gen saleprice_adj = exp(lsaleprice_adj)
	
	* Drop later transactions of BBL

		sort bbl apartmentnumber saledate
		
		gen transaction_number = 1
		replace transaction_number = 1+transaction_number[_n-1] if !missing(transaction_number[_n-1]) & bbl == bbl[_n-1] & apartmentnumber == apartmentnumber[_n-1]
		drop if transaction_number > 1
		
	* Fix other errors (found through manual review)
	
		drop if bbl == 1020441113
		drop if bbl == 3003041003
		drop if bbl == 1003771202
		drop if bbl == 1003771203
		drop if bbl == 2026187501
		
	* Merge into main datafile
		
		rename bbl bbl_old
		rename bbl_condo bbl
		merge m:1 bbl using "$data/clean/pluto_merged.dta", nogen

	* Step 2: Compute underassessment ratio
	
	replace residentialunits = 1 if max(residentialunits,totalunits) == 0 | (missing(residentialunits) & missing(commercialunits))
	drop if residentialunits == 0 & commercialunits > 0 & !missing(commercialunits)
	
	collapse (max) assesstot unitsres taxclassattimeofsale condo (sum) saleprice_adj residentialunits resarea bldgarea, by(bbl)
	
	* Step 3: Fix errors in transactions data
	merge 1:1 bbl using "$data/raw/error_files/errors_transaction_data.dta", update replace nogen keep(1 3 4 5)
	
	capture drop underassess
	gen underassess = ((resarea/bldgarea)*assesstot/(0.06*unitsres/residentialunits))/saleprice_adj if taxclassattimeofsale == 1
	replace underassess = ((resarea/bldgarea)*assesstot/(0.45*unitsres/residentialunits))/saleprice_adj if taxclassattimeofsale == 2
	
	* Step 3: Adjust for outliers (likely non-arms-length transactions)
	replace underassess = 1.5 if underassess > 1.5 & !missing(underassess)
	
	keep if condo == 1
	drop condo
	
*** Save datafile

	save "$data/raw/assessments/underassessment.dta", replace
			
